Pycaret#

1. Indentifikasi Outlier#

data understaending tambahkan identifikasi outlier

# Install library PyOD (untuk ABOD)
!pip install pyod

# Import library
import pandas as pd
import matplotlib.pyplot as plt
from pyod.models.abod import ABOD

# 1. Baca data CSV
df = pd.read_csv("Iris(1).csv")   # ganti path jika perlu
print(df.head())

# 2. Ambil fitur numerik (buang kolom non-numerik seperti 'Species')
X = df.drop(columns=['Species', 'Id'])

# 3. Jalankan ABOD
clf = ABOD(contamination=0.05)  # 5% diasumsikan outlier
clf.fit(X)

# 4. Prediksi outlier
df['outlier'] = clf.predict(X)   # 0 = normal, 1 = outlier
df['score'] = clf.decision_function(X)  # skor outlier

print(df.head())

# 5. Visualisasi (contoh: SepalLengthCm vs SepalWidthCm)
plt.figure(figsize=(8,6))
plt.scatter(df['SepalLengthCm'], df['SepalWidthCm'],
            c=df['outlier'], cmap='coolwarm', edgecolor='k')
plt.xlabel("SepalLengthCm")
plt.ylabel("SepalWidthCm")
plt.title("Deteksi Outlier dengan ABOD (Iris Dataset)")
plt.show()
Requirement already satisfied: pyod in /usr/local/python/3.12.1/lib/python3.12/site-packages (2.0.5)
Requirement already satisfied: joblib in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (1.3.2)
Requirement already satisfied: matplotlib in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (3.7.5)
Requirement already satisfied: numpy>=1.19 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (1.26.4)
Requirement already satisfied: numba>=0.51 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (0.61.2)
Requirement already satisfied: scipy>=1.5.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (1.11.4)
Requirement already satisfied: scikit-learn>=0.22.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pyod) (1.4.2)
Requirement already satisfied: llvmlite<0.45,>=0.44.0dev0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from numba>=0.51->pyod) (0.44.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/codespace/.local/lib/python3.12/site-packages (from scikit-learn>=0.22.0->pyod) (3.6.0)
Requirement already satisfied: contourpy>=1.0.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (1.3.2)
Requirement already satisfied: cycler>=0.10 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (4.58.5)
Requirement already satisfied: kiwisolver>=1.0.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (1.4.8)
Requirement already satisfied: packaging>=20.0 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (25.0)
Requirement already satisfied: pillow>=6.2.0 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (11.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (3.2.3)
Requirement already satisfied: python-dateutil>=2.7 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib->pyod) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in /home/codespace/.local/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib->pyod) (1.17.0)
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species  \
0   1            5.1           3.5            1.4           0.2  Iris-setosa   
1   2            4.9           3.0            1.4           0.2  Iris-setosa   
2   3            4.7           3.2            1.3           0.2  Iris-setosa   
3   4            4.6           3.1            1.5           0.2  Iris-setosa   
4   5            5.0           3.6            1.4           0.2  Iris-setosa   

   outlier       score  
0        0 -295.138889  
1        0 -339.506173  
2        0  -70.492908  
3        0 -153.472222  
4        0  -67.661180  
_images/f827229d3a3cd29c577acc81187322c92c246c1e0d11e783810ac88ec1f45ec5.png
!python3 --version
Python 3.12.1

Install Pycaret untuk melakukan preprocessing data iris

pip install pycaret
Requirement already satisfied: pycaret in /usr/local/python/3.12.1/lib/python3.12/site-packages (3.3.2)
Requirement already satisfied: ipython>=5.5.0 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (9.4.0)
Requirement already satisfied: ipywidgets>=7.6.5 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (8.1.7)
Requirement already satisfied: tqdm>=4.62.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (4.67.1)
Requirement already satisfied: numpy<1.27,>=1.21 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.26.4)
Requirement already satisfied: pandas<2.2.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (2.1.4)
Requirement already satisfied: jinja2>=3 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (3.1.6)
Requirement already satisfied: scipy<=1.11.4,>=1.6.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.11.4)
Requirement already satisfied: joblib<1.4,>=1.2.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.3.2)
Requirement already satisfied: scikit-learn>1.4.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.4.2)
Requirement already satisfied: pyod>=1.1.3 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (2.0.5)
Requirement already satisfied: imbalanced-learn>=0.12.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.14.0)
Requirement already satisfied: category-encoders>=2.4.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (2.7.0)
Requirement already satisfied: lightgbm>=3.0.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (4.6.0)
Requirement already satisfied: numba>=0.55.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.61.2)
Requirement already satisfied: requests>=2.27.1 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (2.32.4)
Requirement already satisfied: psutil>=5.9.0 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (7.0.0)
Requirement already satisfied: markupsafe>=2.0.1 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (3.0.2)
Requirement already satisfied: importlib-metadata>=4.12.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (8.7.0)
Requirement already satisfied: nbformat>=4.2.0 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (5.10.4)
Requirement already satisfied: cloudpickle in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (3.1.1)
Requirement already satisfied: deprecation>=2.1.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (2.1.0)
Requirement already satisfied: xxhash in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (3.5.0)
Requirement already satisfied: matplotlib<3.8.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (3.7.5)
Requirement already satisfied: scikit-plot>=0.3.7 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.3.7)
Requirement already satisfied: yellowbrick>=1.4 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.5)
Requirement already satisfied: plotly>=5.14.0 in /home/codespace/.local/lib/python3.12/site-packages (from pycaret) (6.2.0)
Requirement already satisfied: kaleido>=0.2.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.1.0)
Requirement already satisfied: schemdraw==0.15 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.15)
Requirement already satisfied: plotly-resampler>=0.8.3.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.11.0)
Requirement already satisfied: statsmodels>=0.12.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.14.5)
Requirement already satisfied: sktime==0.26.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (0.26.0)
Requirement already satisfied: tbats>=1.1.3 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (1.1.3)
Requirement already satisfied: pmdarima>=2.0.4 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (2.0.4)
Requirement already satisfied: wurlitzer in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pycaret) (3.1.1)
Requirement already satisfied: packaging in /home/codespace/.local/lib/python3.12/site-packages (from sktime==0.26.0->pycaret) (25.0)
Requirement already satisfied: scikit-base<0.8.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from sktime==0.26.0->pycaret) (0.7.8)
Requirement already satisfied: contourpy>=1.0.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (1.3.2)
Requirement already satisfied: cycler>=0.10 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (4.58.5)
Requirement already satisfied: kiwisolver>=1.0.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (1.4.8)
Requirement already satisfied: pillow>=6.2.0 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (11.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (3.2.3)
Requirement already satisfied: python-dateutil>=2.7 in /home/codespace/.local/lib/python3.12/site-packages (from matplotlib<3.8.0->pycaret) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /home/codespace/.local/lib/python3.12/site-packages (from pandas<2.2.0->pycaret) (2025.2)
Requirement already satisfied: tzdata>=2022.1 in /home/codespace/.local/lib/python3.12/site-packages (from pandas<2.2.0->pycaret) (2025.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/codespace/.local/lib/python3.12/site-packages (from scikit-learn>1.4.0->pycaret) (3.6.0)
Requirement already satisfied: patsy>=0.5.1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from category-encoders>=2.4.0->pycaret) (1.0.1)
Requirement already satisfied: zipp>=3.20 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from importlib-metadata>=4.12.0->pycaret) (3.23.0)
Requirement already satisfied: decorator in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (5.2.1)
Requirement already satisfied: ipython-pygments-lexers in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (1.1.1)
Requirement already satisfied: jedi>=0.16 in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (0.19.2)
Requirement already satisfied: matplotlib-inline in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (0.1.7)
Requirement already satisfied: pexpect>4.3 in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (4.9.0)
Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (3.0.51)
Requirement already satisfied: pygments>=2.4.0 in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (2.19.2)
Requirement already satisfied: stack_data in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (0.6.3)
Requirement already satisfied: traitlets>=5.13.0 in /home/codespace/.local/lib/python3.12/site-packages (from ipython>=5.5.0->pycaret) (5.14.3)
Requirement already satisfied: wcwidth in /home/codespace/.local/lib/python3.12/site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=5.5.0->pycaret) (0.2.13)
Requirement already satisfied: comm>=0.1.3 in /home/codespace/.local/lib/python3.12/site-packages (from ipywidgets>=7.6.5->pycaret) (0.2.2)
Requirement already satisfied: widgetsnbextension~=4.0.14 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from ipywidgets>=7.6.5->pycaret) (4.0.14)
Requirement already satisfied: jupyterlab_widgets~=3.0.15 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from ipywidgets>=7.6.5->pycaret) (3.0.15)
Requirement already satisfied: parso<0.9.0,>=0.8.4 in /home/codespace/.local/lib/python3.12/site-packages (from jedi>=0.16->ipython>=5.5.0->pycaret) (0.8.4)
Requirement already satisfied: choreographer>=1.0.10 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from kaleido>=0.2.1->pycaret) (1.0.10)
Requirement already satisfied: logistro>=1.0.8 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from kaleido>=0.2.1->pycaret) (1.1.0)
Requirement already satisfied: orjson>=3.10.15 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from kaleido>=0.2.1->pycaret) (3.11.3)
Requirement already satisfied: pytest-timeout>=2.4.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from kaleido>=0.2.1->pycaret) (2.4.0)
Requirement already satisfied: simplejson>=3.19.3 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from choreographer>=1.0.10->kaleido>=0.2.1->pycaret) (3.20.1)
Requirement already satisfied: fastjsonschema>=2.15 in /home/codespace/.local/lib/python3.12/site-packages (from nbformat>=4.2.0->pycaret) (2.21.1)
Requirement already satisfied: jsonschema>=2.6 in /home/codespace/.local/lib/python3.12/site-packages (from nbformat>=4.2.0->pycaret) (4.24.0)
Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /home/codespace/.local/lib/python3.12/site-packages (from nbformat>=4.2.0->pycaret) (5.8.1)
Requirement already satisfied: attrs>=22.2.0 in /home/codespace/.local/lib/python3.12/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (25.3.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/codespace/.local/lib/python3.12/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (2025.4.1)
Requirement already satisfied: referencing>=0.28.4 in /home/codespace/.local/lib/python3.12/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.36.2)
Requirement already satisfied: rpds-py>=0.7.1 in /home/codespace/.local/lib/python3.12/site-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.26.0)
Requirement already satisfied: platformdirs>=2.5 in /home/codespace/.local/lib/python3.12/site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat>=4.2.0->pycaret) (4.3.8)
Requirement already satisfied: llvmlite<0.45,>=0.44.0dev0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from numba>=0.55.0->pycaret) (0.44.0)
Requirement already satisfied: ptyprocess>=0.5 in /home/codespace/.local/lib/python3.12/site-packages (from pexpect>4.3->ipython>=5.5.0->pycaret) (0.7.0)
Requirement already satisfied: narwhals>=1.15.1 in /home/codespace/.local/lib/python3.12/site-packages (from plotly>=5.14.0->pycaret) (1.46.0)
Requirement already satisfied: dash>=2.11.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (3.2.0)
Requirement already satisfied: tsdownsample>=0.1.3 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from plotly-resampler>=0.8.3.1->pycaret) (0.1.4.1)
Requirement already satisfied: Flask<3.2,>=1.0.4 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (3.1.2)
Requirement already satisfied: Werkzeug<3.2 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (3.1.3)
Requirement already satisfied: typing-extensions>=4.1.1 in /home/codespace/.local/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (4.14.1)
Requirement already satisfied: retrying in /usr/local/python/3.12.1/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.4.2)
Requirement already satisfied: nest-asyncio in /home/codespace/.local/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.6.0)
Requirement already satisfied: setuptools in /home/codespace/.local/lib/python3.12/site-packages (from dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (80.9.0)
Requirement already satisfied: blinker>=1.9.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from Flask<3.2,>=1.0.4->dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (1.9.0)
Requirement already satisfied: click>=8.1.3 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from Flask<3.2,>=1.0.4->dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (8.2.1)
Requirement already satisfied: itsdangerous>=2.2.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from Flask<3.2,>=1.0.4->dash>=2.11.0->plotly-resampler>=0.8.3.1->pycaret) (2.2.0)
Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pmdarima>=2.0.4->pycaret) (3.1.3)
Requirement already satisfied: urllib3 in /home/codespace/.local/lib/python3.12/site-packages (from pmdarima>=2.0.4->pycaret) (2.5.0)
Requirement already satisfied: pytest>=7.0.0 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pytest-timeout>=2.4.0->kaleido>=0.2.1->pycaret) (8.4.2)
Requirement already satisfied: iniconfig>=1 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido>=0.2.1->pycaret) (2.1.0)
Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/python/3.12.1/lib/python3.12/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido>=0.2.1->pycaret) (1.6.0)
Requirement already satisfied: six>=1.5 in /home/codespace/.local/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib<3.8.0->pycaret) (1.17.0)
Requirement already satisfied: charset_normalizer<4,>=2 in /home/codespace/.local/lib/python3.12/site-packages (from requests>=2.27.1->pycaret) (3.4.2)
Requirement already satisfied: idna<4,>=2.5 in /home/codespace/.local/lib/python3.12/site-packages (from requests>=2.27.1->pycaret) (3.10)
Requirement already satisfied: certifi>=2017.4.17 in /home/codespace/.local/lib/python3.12/site-packages (from requests>=2.27.1->pycaret) (2025.7.9)
Requirement already satisfied: executing>=1.2.0 in /home/codespace/.local/lib/python3.12/site-packages (from stack_data->ipython>=5.5.0->pycaret) (2.2.0)
Requirement already satisfied: asttokens>=2.1.0 in /home/codespace/.local/lib/python3.12/site-packages (from stack_data->ipython>=5.5.0->pycaret) (3.0.0)
Requirement already satisfied: pure-eval in /home/codespace/.local/lib/python3.12/site-packages (from stack_data->ipython>=5.5.0->pycaret) (0.2.3)
Note: you may need to restart the kernel to use updated packages.

Cek versi Python dan Pycarte agar tidak terjadi kesalahan saat melakukan preprocessing

import sys
import pycaret

print("Python version:", sys.version)
print("PyCaret version:", pycaret.__version__)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[4], line 2
      1 import sys
----> 2 import pycaret
      4 print("Python version:", sys.version)
      5 print("PyCaret version:", pycaret.__version__)

File /usr/local/python/3.12.1/lib/python3.12/site-packages/pycaret/__init__.py:22
     16     raise RuntimeError(
     17         "Pycaret only supports python 3.9, 3.10, 3.11. Your actual Python version: ",
     18         sys.version_info,
     19         "Please UPGRADE your Python version.",
     20     )
     21 elif sys.version_info >= (3, 12):
---> 22     raise RuntimeError(
     23         "Pycaret only supports python 3.9, 3.10, 3.11. Your actual Python version: ",
     24         sys.version_info,
     25         "Please DOWNGRADE your Python version.",
     26     )

RuntimeError: ('Pycaret only supports python 3.9, 3.10, 3.11. Your actual Python version: ', sys.version_info(major=3, minor=12, micro=1, releaselevel='final', serial=0), 'Please DOWNGRADE your Python version.')

2. tambahkan menu preposesing isinya penghapusan outlier yang mengunakan 3 metode#

Import Pandas untuk membaca data sebelum di preprocessing

import pandas as pd
from pycaret.datasets import get_data

df = pd.read_csv("Iris(1).csv")
print("Shape:", df.shape)
print(df.head())
Shape: (150, 6)
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Preprocessing menggunakan pycaret dengan 3 metode KNN, Isolation Forest, LOF

from pycaret.anomaly import setup, create_model, assign_model
import pandas as pd

# Load dataset
df = pd.read_csv('Iris(1).csv')

# Ambil hanya fitur numerik
df_num = df.drop(columns=['Id', 'Species'])

# Setup pycaret anomaly detection (versi 3.x)
s = setup(
    data=df_num, 
    session_id=123,
    verbose=False,        # nonaktifkan output berlebihan
    profile=False,        # nonaktifkan profiling otomatis
    html=False            # nonaktifkan tampilan HTML (jika di notebook)
)

# --- 1️⃣ KNN Outlier Detection ---
knn = create_model('knn')
knn_results = assign_model(knn)
df_knn_clean = df[knn_results['Anomaly'] == 0]

# --- 2️⃣ Isolation Forest Outlier Detection ---
iforest = create_model('iforest')
iforest_results = assign_model(iforest)
df_iforest_clean = df[iforest_results['Anomaly'] == 0]

# --- 3️⃣ LOF Outlier Detection ---
lof = create_model('lof')
lof_results = assign_model(lof)
df_lof_clean = df[lof_results['Anomaly'] == 0]

print("🔹 Jumlah data asli:", len(df))
print("🔹 Setelah KNN:", len(df_knn_clean))
print("🔹 Setelah Isolation Forest:", len(df_iforest_clean))
print("🔹 Setelah LOF:", len(df_lof_clean))
                                                         
🔹 Jumlah data asli: 150
🔹 Setelah KNN: 142
🔹 Setelah Isolation Forest: 142
🔹 Setelah LOF: 142

visualisasikan data yang sudah di preprocessing dan tampilkan setiap hasil output tiap metode

import sys, pycaret
print("Python:", sys.version)
print("PyCaret:", pycaret.__version__)
Python: 3.11.13 (main, Jun  4 2025, 08:57:30) [GCC 13.3.0]
PyCaret: 3.3.2
import pandas as pd

iris = pd.read_csv("Iris(1).csv")
iris = iris.drop(columns=["Id"], errors="ignore")  # drop kolom Id kalau ada

# Ambil hanya kolom numerik
iris_num = iris[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]

print(iris_num.dtypes)
iris_num.head()
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
dtype: object
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
from pycaret.anomaly import setup

exp = setup(
    data=iris_num,
    session_id=123
)
  Description Value
0 Session id 123
1 Original data shape (150, 4)
2 Transformed data shape (150, 4)
3 Numeric features 4
4 Preprocess True
5 Imputation type simple
6 Numeric imputation mean
7 Categorical imputation mode
8 CPU Jobs -1
9 Use GPU False
10 Log Experiment False
11 Experiment Name anomaly-default-name
12 USI 0163
from pycaret.anomaly import create_model, assign_model

# Model 1: Isolation Forest
iforest = create_model('iforest')
out_iforest = assign_model(iforest)

# Model 2: KNN
knn = create_model('knn')
out_knn = assign_model(knn)

# Model 3: LOF
lof = create_model('lof')
out_lof = assign_model(lof)

print("Isolation Forest outliers:", out_iforest['Anomaly'].sum())
print("KNN outliers:", out_knn['Anomaly'].sum())
print("LOF outliers:", out_lof['Anomaly'].sum())
Isolation Forest outliers: 8
KNN outliers: 8
LOF outliers: 8
import plotly.io as pio
pio.renderers.default = "notebook"   # kalau di VSCode / Jupyter
# pio.renderers.default = "iframe_connected"  # kalau mau dipaksa jadi iframe
import plotly.express as px

# === Isolation Forest ===
df_iforest = out_iforest.copy()
fig_iforest = px.scatter_3d(
    df_iforest,
    x='SepalLengthCm',
    y='SepalWidthCm',
    z='PetalLengthCm',
    color=df_iforest['Anomaly'].map({0: 'Normal', 1: 'Outlier'}),
    symbol=df_iforest['Anomaly'].map({0: 'circle', 1: 'x'}),
    opacity=0.7,
    title="3D Outlier Detection (Isolation Forest)"
)
fig_iforest.update_traces(marker=dict(size=6))
fig_iforest.show()

# === KNN ===
df_knn = out_knn.copy()
fig_knn = px.scatter_3d(
    df_knn,
    x='SepalLengthCm',
    y='SepalWidthCm',
    z='PetalLengthCm',
    color=df_knn['Anomaly'].map({0: 'Normal', 1: 'Outlier'}),
    symbol=df_knn['Anomaly'].map({0: 'circle', 1: 'x'}),
    opacity=0.7,
    title="3D Outlier Detection (KNN)"
)
fig_knn.update_traces(marker=dict(size=6))
fig_knn.show()

# === LOF ===
df_lof = out_lof.copy()
fig_lof = px.scatter_3d(
    df_lof,
    x='SepalLengthCm',
    y='SepalWidthCm',
    z='PetalLengthCm',
    color=df_lof['Anomaly'].map({0: 'Normal', 1: 'Outlier'}),
    symbol=df_lof['Anomaly'].map({0: 'circle', 1: 'x'}),
    opacity=0.7,
    title="3D Outlier Detection (LOF)"
)
fig_lof.update_traces(marker=dict(size=6))
fig_lof.show()